utf8proc_uint16_t uppercase_seqindex;
utf8proc_uint16_t lowercase_seqindex;
utf8proc_uint16_t titlecase_seqindex;
+ /**
+ * Character combining table.
+ *
+ * The character combining table is formally indexed by two
+ * characters, the first and second character that might form a
+ * combining pair. The table entry then contains the combined
+ * character. Most character pairs cannot be combined. There are
+ * about 1,000 characters that can be the first character in a
+ * combining pair, and for most, there are only a handful for
+ * possible second characters.
+ *
+ * The combining table is stored as `utf8proc_uint32_t
+ * utf8proc_combinations[][2]`. That is, it contains a pair `(second
+ * combining character, combined character)` for every character
+ * that can be a first combining character.
+ *
+ * - `comb_index`: Index into the combining table if this character
+ * is the first character in a combining pair, else 0x3ff
+ *
+ * - `comb_length`: Number of table entries for this first character
+ *
+ * - `comb_is_second`: As optimization we also record whether this
+ * characther is the second combining character in any pair. If
+ * not, we can skip the table lookup.
+ *
+ * A table lookup starts from a given character pair. It first
+ * checks whether the first character is stored in the table
+ * (checking whether the index is 0x3ff) and whether the second
+ * index is stored in the table (looking at `comb_is_second`). If
+ * so, the `comb_length` table entries will be checked sequentially
+ * for a match.
+ */
utf8proc_uint16_t comb_index:10;
utf8proc_uint16_t comb_length:5;
utf8proc_uint16_t comb_issecond:1;